Source code for hysop.numerics.fft.opencl_fft

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""
OpenCl backend base interface for fast Fourier Transforms.

:class:`~hysop.numerics.opencl_fft.OpenClFFTI`
:class:`~hysop.numerics.opencl_fft.OpenClFFTPlanI`
:class:`~hysop.numerics.opencl_fft.OpenClFFTQueue`
"""

from abc import abstractmethod
from hysop.tools.htypes import first_not_None, check_instance
from hysop.numerics.fft.fft import FFTQueueI, FFTPlanI, FFTI
from hysop.symbolic.relational import Assignment
from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
from hysop.backend.device.opencl.opencl_array import OpenClArray
from hysop.backend.device.opencl.opencl_elementwise import (
    OpenClElementwiseKernelGenerator,
)
from hysop.backend.device.opencl.opencl_kernel_launcher import (
    OpenClKernelListLauncher,
    OpenClKernelLauncherI,
)
from hysop.backend.device.opencl.opencl_copy_kernel_launchers import (
    OpenClCopyBufferRectLauncher,
    OpenClFillKernelLauncher,
)
from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelListLauncher
from hysop.backend.device.opencl.autotunable_kernels.transpose import (
    OpenClAutotunableTransposeKernel,
)


[docs] class OpenClFFTQueue(FFTQueueI): """An opencl fft queue is just like a standard opencl queue.""" def __init__(self, queue, name="fft_planner"): self._launcher = OpenClKernelListLauncher(name=name) self._queue = queue def __iadd__(self, kernel): self._launcher += kernel return self
[docs] def execute(self, wait_for=None): return self._launcher(queue=self._queue, wait_for=wait_for)
[docs] class OpenClFFTPlanI(FFTPlanI, OpenClKernelLauncherI): """ Tag for FFT plans executing on OpenCL backend arrays. """ def __init__(self, **kwds): super().__init__(**kwds) self._name = "fft_plan"
[docs] @abstractmethod def execute(self, **kwds): pass
def __call__(self, **kwds): return self.execute(**kwds)
[docs] def global_size_configured(self): return True
[docs] class OpenClFFTI(FFTI): """ Abstract base for FFT interfaces targetting OpenCL backends. """ def __init__(self, cl_env, backend=None, allocator=None, queue=None, **kwds): from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend from hysop.backend.device.opencl.opencl_env import OpenClEnvironment if backend is None: if queue is None: queue = cl_env.default_queue backend = OpenClArrayBackend.get_or_create( cl_env=cl_env, queue=queue, allocator=allocator ) else: msg = "OpenCl environment does not match the one of the backend." assert backend.cl_env is cl_env, msg if allocator is not None: msg = "OpenCl allocator does not match the one of the backend." assert backend.allocator is allocator, msg if queue.context != cl_env.context: msg = "Queue does not match context:" msg += f"\n *Given context is {cl_env.context}." msg += f"\n *Command queue context is {queue.context}." raise ValueError(msg) check_instance(cl_env, OpenClEnvironment) check_instance(backend, OpenClArrayBackend) super().__init__(backend=backend, **kwds) self.cl_env = cl_env self.queue = queue self.kernel_generator = OpenClElementwiseKernelGenerator(cl_env=cl_env)
[docs] @classmethod def default_interface( cls, cl_env, backend=None, allocator=None, warn_on_allocation=False, error_on_allocation=True, warn_on_unaligned_output_offset=True, **kwds, ): """Get the default OpenCl FFT interface which is a GpyFFT interface.""" from hysop.numerics.fft.gpyfft_fft import GpyFFT return GpyFFT( cl_env=cl_env, backend=backend, allocator=allocator, warn_on_allocation=warn_on_allocation, error_on_allocation=error_on_allocation, warn_on_unaligned_output_offset=warn_on_unaligned_output_offset, **kwds, )
[docs] def new_queue(self, tg, name): return OpenClFFTQueue(queue=self.queue, name=name)
[docs] def plan_copy(self, tg, src, dst): src = self.ensure_buffer(src) dst = self.ensure_buffer(dst) launcher = OpenClCopyBufferRectLauncher.from_slices("copy", src=src, dst=dst) return launcher
[docs] def plan_accumulate(self, tg, src, dst): src = self.ensure_buffer(src) dst = self.ensure_buffer(dst) src, dst = self.kernel_generator.arrays_to_symbols(src, dst) expr = Assignment(dst, src + dst) launcher, _ = self.kernel_generator.elementwise_kernel("accumulate", expr) return launcher
[docs] def plan_transpose(self, tg, src, dst, axes): src = self.ensure_buffer(src) dst = self.ensure_buffer(dst) backend_kwds = { "cl_env": tg.backend.cl_env, "typegen": tg.op.typegen, "autotuner_config": tg.op.autotuner_config, "build_opts": tg.op.build_options(), } kernel = OpenClAutotunableTransposeKernel(**backend_kwds) transpose, _ = kernel.autotune( axes=axes, hardcode_arrays=True, is_inplace=False, input_buffer=src, output_buffer=dst, ) return transpose.build_launcher(in_base=src.base_data, out_base=dst.base_data)
[docs] def plan_fill_zeros(self, tg, a, slices): if not slices: return a = self.ensure_buffer(a) launcher = OpenClKernelListLauncher(name="fill_zeros") for slc in slices: lnc = OpenClFillKernelLauncher.from_slices( varname="buffer", backend=tg.backend, dst=a[slc], fill_value=0 ) launcher += lnc return launcher
[docs] def plan_compute_energy( self, tg, fshape, src, dst, transforms, mutexes, method="round", target=None ): (N, NS2, C2C, R2C, S) = super().plan_compute_energy( tg, fshape, src, dst, transforms ) # We do not forget the hermitian symmetry: # normally: E = 1/2 |Fi|**2 # but for a R2C transform: E = 1/2 |Fi|**2 + 1/2 |Fi*|**2 = 1 |Fi|**2 C = 2 ** (R2C - 1) import numpy as np import sympy as sm from hysop.tools.numerics import is_complex, is_fp, complex_to_float_dtype from hysop.symbolic import local_indices_symbols from hysop.symbolic.relational import Assignment, LogicalGT, Round from hysop.symbolic.misc import Cast, Select, CriticalCodeSection from hysop.symbolic.complex import cabs2 dim = src.ndim dtype = src.dtype ftype = dtype if is_fp(dtype) else complex_to_float_dtype(dtype) itype = np.int32 I = local_indices_symbols[:dim][::-1] (src,) = self.kernel_generator.arrays_to_symbols(src) dst, mutexes = self.kernel_generator.buffer_to_symbols(dst, mutexes) (k,) = self.kernel_generator.symbolic_tmp_scalars("k", dtype=itype) (E,) = self.kernel_generator.symbolic_tmp_scalars("E", dtype=ftype) K = self.kernel_generator.symbolic_tmp_scalars("K", dtype=ftype, count=dim) exprs = () assert len(K) == len(I) == len(N) == len(NS2) == len(C2C) for Ki, Ii, Ni, Nis2, c2c in zip(K, I, N, NS2, C2C): if c2c: rhs = Select(Ii, Ni - Ii, LogicalGT(Ii, Nis2)) else: rhs = Ii e = Assignment(Ki, rhs) exprs += (e,) if dim == 1: exprs += (Assignment(k, K[0]),) else: exprs += (Assignment(k, Cast(Round(sm.sqrt(np.dot(K, K))), itype)),) exprs += (Assignment(E, C * S * S * cabs2(src)),) action = Assignment(dst[k], dst[k] + E) exprs += (CriticalCodeSection(action, mutexes, k),) # this kernel is not optimized (we use a __global mutex for each wavenumber for now) launcher, _ = self.kernel_generator.elementwise_kernel( "compute_energy", *exprs, force_volatile=(dst,), max_candidates=1, debug=False, ) return launcher
[docs] @classmethod def ensure_buffer(cls, get_buffer): if callable(get_buffer): buf = get_buffer() else: buf = get_buffer return buf